library(knitr)
library(readxl)
library(openxlsx)
library(dplyr)
library(tidyr)
library(DescTools)
library(DT)
library(ggplot2)
library(ggiraph)
library(gridExtra)
library(plotly)
LibrariesVersions <- data.frame(
LIBRARY = c("KNITR", "READXL", "OPENXLSX", "DPLYR", "TIDYR",
"DESCTOOLS", "DT", "GGPLOT2", "GGIRAPH", "GRIDEXTRA", "PLOTLY"),
VERSION = c(as.character(packageVersion("knitr")), as.character(packageVersion("readxl")),
as.character(packageVersion("openxlsx")), as.character(packageVersion("dplyr")),
as.character(packageVersion("tidyr")), as.character(packageVersion("DescTools")),
as.character(packageVersion("DT")), as.character(packageVersion("ggplot2")),
as.character(packageVersion("ggiraph")), as.character(packageVersion("gridExtra")),
as.character(packageVersion("plotly"))))
datatable(LibrariesVersions, options = list(scrollX = TRUE, autoWidth = TRUE),
caption = paste("Libraries Versions | R",
gsub("^R version\\s*|\\s*\\(\\d{4}-\\d{2}-\\d{2} ucrt\\)", "", R.version.string)),
style = "bootstrap", rownames = FALSE) %>%
formatStyle(columns = 1, textAlign = "left") %>% formatStyle(columns = 2, textAlign = "center")
ISO 3166-1 is an international standard from International OrGanization for Standardization (ISO), with 3 codes:
NUMERIC CODE \(\equiv\) M49 CODE from United Nations Statistic Division (UNSD). ISO 3166-1 is available for autonomous countries or territories, while M49 is also available for world or continents.
How to obtain a code? Be a United Nations (UN) or a UN Specialized AGencies (FAO | World Bank | WHO | …) Member State.
There are more NUMERIC/M49 CODE (249) than countries in UN (198).
Data are taken from UN | PoPulation Division. Estimates of the number (or “stock”) of international miGrants (1990-2020) are available for 235 countries/areas of the world. The international miGrant population corresponds to the total number of miGrants present in a Given countrY at a Given moment in time (foreiGn-born). Tourists are not counted amonG miGrants.
Data are orGanized on the basis of
Residence-Birth pairs accordinG to certain
classifications or countries/areas:
Remark: Income Levels are not available for all countries/areas.
International MiGrant Stocks (IMS) in the World
(Residence) born in Europe and Northern America
(Birth) are shown in this table. We have estimates from
1990 to 2020 (Year).
#Load XLSX File
IMS_Total <- read_xlsx(
"../Données/IMS_from_ONU/ims_sex_residence_and_birth_2020.xlsx", sheet = "Table 1", skip = 8) %>%
select(2, 4, 6:14) %>%
rename(Residence = 1, ResidenceCode = 2, Birth = 3, BirthCode = 4,
`1990` = 5, `1995` = 6, `2000` = 7, `2005` = 8, `2010` = 9, `2015` = 10, `2020` = 11)
#One line (with all Years) for each Residence-Birth to one line/Year for each Residence-Birth
IMS_Total <- pivot_longer(IMS_Total, cols = 5:11) %>% rename(Year = 5, IMS_Total = 6)
#IMS_Male
IMS_Male <- read_xlsx(
"../Données/IMS_from_ONU/ims_sex_residence_and_birth_2020.xlsx", sheet = "Table 1", skip = 8) %>%
select(2, 4, 6:7, 15:21) %>%
rename(Residence = 1, ResidenceCode = 2, Birth = 3, BirthCode = 4,
`1990` = 5, `1995` = 6, `2000` = 7, `2005` = 8, `2010` = 9, `2015` = 10, `2020` = 11)
IMS_Male <- pivot_longer(IMS_Male, cols = 5:11) %>% rename(Year = 5, IMS_Male = 6)
#IMS_Female
IMS_Female <- read_xlsx(
"../Données/IMS_from_ONU/ims_sex_residence_and_birth_2020.xlsx", sheet = "Table 1", skip = 8) %>%
select(2, 4, 6:7, 22:28) %>%
rename(Residence = 1, ResidenceCode = 2, Birth = 3, BirthCode = 4,
`1990` = 5, `1995` = 6, `2000` = 7, `2005` = 8, `2010` = 9, `2015` = 10, `2020` = 11)
IMS_Female <- pivot_longer(IMS_Female, cols = 5:11) %>% rename(Year = 5, IMS_Female = 6)
#Combine IMS_Total | IMS_Male | IMS_Female
IMS <- cbind(IMS_Total, IMS_Male[length(IMS_Male)], IMS_Female[length(IMS_Female)])
#Example: World-(Europe and NA)
kable(IMS[57:63, c(1, 3, 5:8)], row.names = FALSE, align = c("l", "l", "c", "c", "c", "c"),
caption = "World's MiGration Stocks born in Europe and Northern America (1990-2020)")
| Residence | Birth | Year | IMS_Total | IMS_Male | IMS_Female |
|---|---|---|---|---|---|
| WORLD | Europe and Northern America | 1990 | 50532413 | 23704534 | 26827879 |
| WORLD | Europe and Northern America | 1995 | 51582164 | 24148758 | 27433406 |
| WORLD | Europe and Northern America | 2000 | 52877703 | 24676104 | 28201599 |
| WORLD | Europe and Northern America | 2005 | 54849090 | 25682798 | 29166292 |
| WORLD | Europe and Northern America | 2010 | 58681104 | 27337252 | 31343852 |
| WORLD | Europe and Northern America | 2015 | 60592402 | 28229706 | 32362696 |
| WORLD | Europe and Northern America | 2020 | 67601621 | 31770385 | 35831236 |
This was an example between World-(Europe and NA). Data are also available for more areas/countries (282).
#Residence(s) in XLSX
ResidencesIMS <- unique(IMS[c("Residence", "ResidenceCode")])
datatable(ResidencesIMS, options = list(scrollX = TRUE, autoWidth = TRUE),
caption = "Residences in IMS DATA", style = "bootstrap", rownames = FALSE) %>%
formatStyle(columns = 1, textAlign = "left", `text-transform` = "uppercase") %>%
formatStyle(columns = 2, textAlign = "center")
A few minor alterations have been made to Residence and
Birth columns.
#Columns: Residence | Birth
IMS.DATA <- IMS %>%
mutate_if(is.character, ~case_when(
. == "Oceania (excluding Australia and New Zealand)" ~ "Oceania (Australia and New Zealand excluded)",
. == "Developed regions" ~ "More Developed Countries",
. == "Less developed regions" ~ "Less Developed Countries",
. == "Less developed regions, excluding least developed countries" ~
"Less Developed Countries (Least Developed Countries excluded)",
. == "Less developed regions, excluding China" ~ "Less Developed Countries (China excluded)",
. == "Least developed countries" ~ "Least Developed Countries (LDC)",
. == "Land-locked Developing Countries (LLDC)" ~ "Land-Locked Developing Countries (LLDC)",
. == "Small island developing States (SIDS)" ~ "Small Island Developing States (SIDS)",
. == "High-income countries" ~ "High-Income Countries",
. == "Middle-income countries" ~ "Middle-Income Countries",
. == "Upper-middle-income countries" ~ "Upper-Middle-Income Countries",
. == "Lower-middle-income countries" ~ "Lower-Middle-Income Countries",
. == "Low-income countries" ~ "Low-Income Countries",
. == "Bolivia (Plurinational State of)" ~ "Bolivia",
. == "China, Hong Kong SAR*" ~ "Hong Kong SAR",
. == "China, Macao SAR*" ~ "Macao SAR",
. == "China, Taiwan Province of China*" ~ "Taiwan (Province of China)",
. == "Iran (Islamic Republic of)" ~ "Iran",
. == "Micronesia (Fed. States of)" ~ "Micronesia",
. == "Saint Helena*" ~ "Saint Helena, Ascension and Tristan da Cunha",
. == "Saint Martin (French part)*" ~ "Saint Martin",
. == "Sint Maarten (Dutch part)*" ~ "Sint Maarten",
. == "Turkey" ~ "Türkiye",
. == "United Kingdom*" ~ "United Kingdom of Great Britain and Northern Ireland",
. == "Venezuela (Bolivarian Republic of)" ~ "Venezuela", TRUE ~ .)) %>%
mutate(across(c(Residence, Birth), ~gsub("\\*", "", .)))
Remark: STOCKs constitute a fraction of the information, we need to know SHAREs.
We need to \(\sum\) IMS from these 6 REGIONs to calculate SHAREs.
#Keep IMS from CRs (6)
IMS.FDATA <- IMS.DATA %>% filter(Residence == "WORLD", BirthCode %in% c(903, 935, 908, 904, 905, 909))
#Make ∑/Year
IMS.FDATA_CRs <- IMS.FDATA %>% group_by(Year) %>%
summarize(IMS_Total_CRs = sum(IMS_Total), IMS_Male_CRs = sum(IMS_Male), IMS_Female_CRs = sum(IMS_Female))
#Check Correct ∑
IMS.DATA_World <- IMS.DATA %>% filter(Residence == "WORLD", Birth == "WORLD")
#∑/Year !Correct => Omitted DATA
IMS.FDATA_CRs <- merge(IMS.FDATA_CRs, IMS.DATA_World, by = "Year") %>%
rename(IMS_Total_World = IMS_Total, IMS_Male_World = IMS_Male, IMS_Female_World = IMS_Female) %>%
mutate(Omitted_Total_SHARE = round(((IMS_Total_World - IMS_Total_CRs) / IMS_Total_World) * 100, 3),
Omitted_Male_SHARE = round(((IMS_Male_World - IMS_Male_CRs) / IMS_Male_World) * 100, 3),
Omitted_Female_SHARE = round(((IMS_Female_World - IMS_Female_CRs) / IMS_Female_World) * 100, 3)) %>%
select(5:8, 1, 9, 2, 12, 10, 3, 13, 11, 4, 14)
#Table World-World
datatable(IMS.FDATA_CRs[, c(1, 3, 5:14)], options = list(scrollX = TRUE, autoWidth = TRUE),
style = "bootstrap", rownames = FALSE, caption = "World's Total MiGration Stocks (1990-2020)") %>%
formatStyle(columns = c(1, 2), textAlign = "left") %>% formatStyle(columns = c(3:14), textAlign = "center")
IMS_Total_CRs \(<\)
World-World IMS (IMS_Total_World). There is 4-5.5% omitted
data (Omitted_Total_SHARE) in IMS_Total_CRs.
Where are these 4-5.5%? XLSX file include a row with IMS
born in Other for countries but not for REGIONs, so we
\(\sum\) all these Other
rows (IMS_Total_OTHER). IMS_Total_OTHER match
Omitted_Total_SHARE?
#Keep Other/Countries
IMS.DATA_OTHERs <- IMS.DATA %>% filter(Birth == "Other")
#∑/Year
IMS.DATA_OTHER <- IMS.DATA_OTHERs %>% group_by(Year) %>%
summarize(IMS_Total_OTHER = sum(IMS_Total), IMS_Male_OTHER = sum(IMS_Male), IMS_Female_OTHER = sum(IMS_Female))
#Variable(s)...
IMS.DATA_OTHER <- IMS.DATA_OTHER %>%
mutate(
Residence = IMS.FDATA_CRs$Residence,
ResidenceCode = IMS.FDATA_CRs$ResidenceCode,
Birth = "OTHER", BirthCode = 2003) %>% select(5:8, everything())
#Table World-OTHER
kable(IMS.DATA_OTHER[ , c(1, 3, 5:8)], row.names = FALSE,
align = c("l", "l", "c", "c", "c", "c"),
caption = "World's MiGration Stocks born in OTHER (1990-2020)")
| Residence | Birth | Year | IMS_Total_OTHER | IMS_Male_OTHER | IMS_Female_OTHER |
|---|---|---|---|---|---|
| WORLD | OTHER | 1990 | 8640334 | 4408990 | 4231344 |
| WORLD | OTHER | 1995 | 7477497 | 3776224 | 3701273 |
| WORLD | OTHER | 2000 | 7063730 | 3593958 | 3469772 |
| WORLD | OTHER | 2005 | 7820665 | 4096864 | 3723801 |
| WORLD | OTHER | 2010 | 8757236 | 4601492 | 4155744 |
| WORLD | OTHER | 2015 | 10918842 | 5685512 | 5233330 |
| WORLD | OTHER | 2020 | 12657151 | 6533198 | 6123953 |
IMS_Total_World = IMS_Total_CRs +
IMS_Total_OTHER. We can include a seventh REGION called
OTHER to make our visualization. Now, we are able to
calculate SHAREs for REGIONs (IMS_Total_SHARE).
#DATA MANIPULATION...
IMS.DATA_OTHER <- IMS.DATA_OTHER %>%
rename(IMS_Total = IMS_Total_OTHER, IMS_Male = IMS_Male_OTHER, IMS_Female = IMS_Female_OTHER)
IMS.FDATA <- rbind(IMS.FDATA, IMS.DATA_OTHER)
#Calculate SHARE/IMS_Total
SHAREs <- list()
for (ROW in 1:nrow(IMS.FDATA)) {
YYYY <- IMS.FDATA$Year[ROW]
IMS <- IMS.FDATA_CRs$IMS_Total_World[IMS.FDATA_CRs$Year == YYYY]
SHARE <- round((IMS.FDATA$IMS_Total[ROW] / IMS) * 100, 5)
SHAREs[[ROW]] <- SHARE}
IMS.FDATA$IMS_Total_SHARE <- unlist(SHAREs)
#Calculate SHARE/IMS_Male
SHAREs <- list()
for (ROW in 1:nrow(IMS.FDATA)) {
YYYY <- IMS.FDATA$Year[ROW]
IMS <- IMS.FDATA_CRs$IMS_Male_World[IMS.FDATA_CRs$Year == YYYY]
SHARE <- round((IMS.FDATA$IMS_Male[ROW] / IMS) * 100, 5)
SHAREs[[ROW]] <- SHARE}
IMS.FDATA$IMS_Male_SHARE <- unlist(SHAREs)
#Calculate SHARE/IMS_Female
SHAREs <- list()
for (ROW in 1:nrow(IMS.FDATA)) {
YYYY <- IMS.FDATA$Year[ROW]
IMS <- IMS.FDATA_CRs$IMS_Female_World[IMS.FDATA_CRs$Year == YYYY]
SHARE <- round((IMS.FDATA$IMS_Female[ROW] / IMS) * 100, 5)
SHAREs[[ROW]] <- SHARE}
IMS.FDATA$IMS_Female_SHARE <- unlist(SHAREs)
#Table World-(Europe and NA)
kable(IMS.FDATA[15:21, c(1, 3, 5:11)], row.names = FALSE,
align = c("l", "l", "c", "c", "c", "c", "c", "c", "c"),
caption = "World's MiGration Stocks (%) born in EUROPE (1990-2020)",
digits = c(0, 0, 0, 0, 0, 0, 3, 3, 3))
| Residence | Birth | Year | IMS_Total | IMS_Male | IMS_Female | IMS_Total_SHARE | IMS_Male_SHARE | IMS_Female_SHARE |
|---|---|---|---|---|---|---|---|---|
| WORLD | EUROPE | 1990 | 47730278 | 22438898 | 25291380 | 31.199 | 28.930 | 33.533 |
| WORLD | EUROPE | 1995 | 48636003 | 22816174 | 25819829 | 30.154 | 27.967 | 32.393 |
| WORLD | EUROPE | 2000 | 49727784 | 23247653 | 26480131 | 28.706 | 26.502 | 30.967 |
| WORLD | EUROPE | 2005 | 51336984 | 24061192 | 27275792 | 26.815 | 24.637 | 29.084 |
| WORLD | EUROPE | 2010 | 54711301 | 25508453 | 29202848 | 24.758 | 22.387 | 27.282 |
| WORLD | EUROPE | 2015 | 56577753 | 26369327 | 30208426 | 22.817 | 20.563 | 25.232 |
| WORLD | EUROPE | 2020 | 63273663 | 29748898 | 33524765 | 22.550 | 20.424 | 24.844 |
IMS_Total_SHARE in World born in EUROPE is down, from
31.20% in 1990 to 22.55% in 2020.
To visualize IMS DATA (STOCKs and SHAREs), we will use 4 libraries
(ggplot2 | ggplotly | ggiraph |
plotly).
We will use LINEs and STACKEd BARs to illustrate IMS Evolution (in Millions) from 1990 to 2020. STACKEd BARs will show IMS Evolution in a Residence Area from \(n\) Birth Areas.
#N°1
LINEs <- ggplot(IMS.FDATA_CRs, aes(x = Year)) +
geom_line(aes(y = IMS_Total_World, color = "IMS_Total_World", group = "IMS_Total_World")) +
geom_line(aes(y = IMS_Male_World, color = "IMS_Male_World", group = "IMS_Male_World")) +
geom_line(aes(y = IMS_Female_World, color = "IMS_Female_World", group = "IMS_Female_World")) +
geom_point(aes(y = IMS_Total_World, color = "IMS_Total_World", group = "IMS_Total_World")) +
geom_point(aes(y = IMS_Male_World, color = "IMS_Male_World", group = "IMS_Male_World")) +
geom_point(aes(y = IMS_Female_World, color = "IMS_Female_World", group = "IMS_Female_World")) +
labs(title = "International MiGrant Stocks in World (1990-2020)", y = "International MiGrant Stock") +
#200 000 To 200M
scale_y_continuous(labels = scales::label_number(scale = 1e-6, suffix = "M")) +
theme_minimal() +
scale_color_manual(
values = c("IMS_Total_World" = "black", "IMS_Male_World" = "blue", "IMS_Female_World" = "pink"),
labels = c(
"IMS_Total_World" = "Both Sexes Combined", "IMS_Male_World" = "Males", "IMS_Female_World" = "Females")) +
theme(plot.title = element_text(hjust = 0.5), legend.position = "top", legend.title = element_blank())
LINEs
We can follow IMS Evolution, which has become more intense since 2005. STOCKs are balanced in a fair manner between males and females (1990-2005), then leans in favor of men (2005-2020).
We don’t have accurate STOCKs because it’s static, we need to make it interactive. We also have to rescale STOCKs to add “M” suffix on YAxis.
#N°2
LINEs_INTERACTIVE_One <- plot_ly(data = IMS.FDATA_CRs, x = ~Year, y = ~IMS_Total_World,
type = 'scatter', name = 'Both Sexes Combined',
mode = 'lines+markers', line = list(color = 'black'), marker = list(color = 'black'),
text = with(IMS.FDATA_CRs, paste(" Residence:", Residence, "<br>",
"Birth:", Birth, "<br>",
"Year:", Year, "<br>",
"International MiGrant Stock (Both Sexes Combined):",
paste0(format(x = IMS_Total_World, trim = TRUE,
scientific = FALSE, big.mark = " "), "M"))),
hoverinfo = "text") %>%
add_trace(x = ~Year, y = ~IMS_Male_World, name = 'Males', line = list(color = 'blue'), marker = list(color = 'blue'),
text = with(IMS.FDATA_CRs, paste(" Residence:", Residence, "<br>", "Birth:", Birth, "<br>", "Year:", Year, "<br>",
"International MiGrant Stock (Males):",
paste0(format(x = IMS_Male_World, trim = TRUE, scientific = FALSE,
big.mark = " "), "M"))), hoverinfo = "text") %>%
add_trace(x = ~Year, y = ~IMS_Female_World, name = 'Females', line = list(color = 'pink'), marker = list(color = 'pink'),
text = with(IMS.FDATA_CRs, paste(" Residence:", Residence, "<br>", "Birth:", Birth, "<br>", "Year:", Year, "<br>",
"International MiGrant Stock (Females):",
paste0(format(x = IMS_Female_World, trim = TRUE, scientific = FALSE,
big.mark = " "), "M"))), hoverinfo = "text") %>%
layout(title = "International MiGrant Stocks in World (1990-2020)",
xaxis = list(title = "Year"), yaxis = list(title = "International MiGrant Stock"),
legend = list(x = 0.5, y = 1.05, xanchor = "center", orientation = "h"))
LINEs_INTERACTIVE_One
A download button is available. We can see Residence |
Birth | Year | STOCKs in Hover Text. We don’t
need to rescale STOCKs to add “M” suffix on YAxis, we need to do it for
Hover Text.
#N°3
LINEs_INTERACTIVE_Two <- ggplotly(LINEs) %>%
layout(legend = list(title = "", x = 0.5, y = 1.05, xanchor = "center", orientation = "h"))
for (i in 1:length(LINEs_INTERACTIVE_Two$x$data)) {
LINEs_INTERACTIVE_Two$x$data[[i]]$name <- c("Both Sexes Combined", "Males", "Females")[i]}
LINEs_INTERACTIVE_Two
We can’t remove default Hover Text and text ARGu within
aes() in geom_point() does not allow for
modifications to Hover Text. Customs labels have to be redone here.
#N°4
LINEs_INTERACTIVE_THREE <- ggplot(IMS.FDATA_CRs) +
geom_line_interactive(aes(x = Year, y = IMS_Total_World, color = "IMS_Total_World", group = "IMS_Total_World")) +
geom_line_interactive(aes(x = Year, y = IMS_Male_World, color = "IMS_Male_World", group = "IMS_Male_World")) +
geom_line_interactive(aes(x = Year, y = IMS_Female_World, color = "IMS_Female_World", group = "IMS_Female_World")) +
geom_point_interactive(aes(x = Year, y = IMS_Total_World, color = "IMS_Total_World", group = "IMS_Total_World",
tooltip = paste("Residence:", Residence, "<br>Birth:", Birth, "<br>Year:", Year,
"<br>International MiGrant Stock (Both Sexes Combined):",
paste0(format(x = IMS_Total_World, trim = TRUE,
scientific = FALSE, big.mark = " "), "M")))) +
geom_point_interactive(aes(x = Year, y = IMS_Male_World, color = "IMS_Male_World", group = "IMS_Male_World",
tooltip = paste("Residence:", Residence, "<br>Birth:", Birth, "<br>Year:", Year,
"<br>International MiGrant Stock (Males):",
paste0(format(x = IMS_Male_World, trim = TRUE,
scientific = FALSE, big.mark = " "), "M")))) +
geom_point_interactive(aes(x = Year, y = IMS_Female_World, color = "IMS_Female_World", group = "IMS_Female_World",
tooltip = paste("Residence:", Residence, "<br>Birth:", Birth, "<br>Year:", Year,
"<br>International MiGrant Stock (Females):",
paste0(format(x = IMS_Female_World, trim = TRUE,
scientific = FALSE, big.mark = " "), "M")))) +
labs(title = "International MiGrant Stocks in World (1990-2020)", y = "International MiGrant Stock") +
scale_y_continuous(labels = scales::label_number(scale = 1e-6, suffix = "M")) +
theme_minimal() +
scale_color_manual(
values = c("IMS_Total_World" = "black", "IMS_Male_World" = "blue", "IMS_Female_World" = "pink"),
labels = c("IMS_Total_World" = "Both Sexes Combined", "IMS_Male_World" = "Males", "IMS_Female_World" = "Females")) +
theme(plot.title = element_text(hjust = 0.5), legend.position = "top", legend.title = element_blank())
girafe(ggobj = LINEs_INTERACTIVE_THREE)
Function geom_bar_interactive serves as an alternative
to geom_bar to make it interactive. A download button is
also available.
#COLORs
CRs <- c("#339900", "#FF9900", "#003366", "#FF0000", "#663399", "#FFCC00", "#999999")
#N°1
BARs <- ggplot(IMS.FDATA, aes(x = Year, y = IMS_Total, fill = Birth,
#For N°3
text = paste(" Residence:", Residence, "<br>", "Birth:", Birth, "<br>",
"Year:", Year, "<br>", "International MiGrant Stock:",
format(x = IMS_Total, trim = TRUE, scientific = FALSE, big.mark = " ")))) +
#STACKEd
geom_bar(stat = "identity") +
labs(title = "International MiGrant Stocks in World from 6 Areas (1990-2020)",
y = "International MiGrant Stock - Both Sexes Combined") +
scale_y_continuous(labels = scales::label_number(scale = 1e-6, suffix = "M")) +
scale_fill_manual(values = CRs) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5), legend.position = "right")
BARs
We can see IMS Global Evolution from 150M in 1990 to \(>\) 250M in 2020. We also notice a more marked evolution in ASIA than in EUROPE.
Birth Areas are in A-Z order, we will have to use factor levels to reorder Birth Areas.
Non-POSITIVE comments are same as for LINES. OCEANIA and NORTHERN AMERICA are too small.
In STACKEd BARs above, we have data for a
Residence-Birth pair and all Year
in a row.
#Residence-Birth/Year To Year/Residence/Birth
IMS.FDATA_INTERACTIVE <- IMS.FDATA %>% group_by(Year, Residence, Birth) %>%
summarise(IMS_Total_SHARE = sum(IMS_Total_SHARE), IMS_Total = sum(IMS_Total))
#Table 1990/World
kable(IMS.FDATA_INTERACTIVE[1:7, c(1:3, 5)], row.names = FALSE,
align = c("l", "l", "l", "c"),
caption = "International MiGrant Stocks in World from 6 Areas in 1990")
| Year | Residence | Birth | IMS_Total |
|---|---|---|---|
| 1990 | WORLD | AFRICA | 20621464 |
| 1990 | WORLD | ASIA | 56957000 |
| 1990 | WORLD | EUROPE | 47730278 |
| 1990 | WORLD | LATIN AMERICA AND THE CARIBBEAN | 15273399 |
| 1990 | WORLD | NORTHERN AMERICA | 2802135 |
| 1990 | WORLD | OCEANIA | 961547 |
| 1990 | WORLD | OTHER | 8640334 |
Now, we need to have data for a Year and all
Residence-Birth pairs in a row.
#N°2
BARs_INTERACTIVE_One <- plot_ly(data = IMS.FDATA_INTERACTIVE, x = ~Year, y = ~IMS_Total,
color = ~Birth, type = "bar", colors = CRs, legendgroup = 'Birth',
text = with(IMS.FDATA_INTERACTIVE, paste(" Residence:", Residence, "<br>",
"Birth:", Birth, "<br>", "Year:", Year, "<br>",
"International MiGrant Stock:",
paste0(format(IMS_Total, trim = TRUE,
scientific = FALSE, big.mark = " ")))),
hoverinfo = "text") %>%
layout(barmode = "stack",
yaxis = list(title = "International MiGrant Stock - Both Sexes Combined"),
title = "International MiGrant Stocks in World from 6 Areas (1990-2020)",
legend = list(title = list(text='<b> Birth </b>'), x=1, y=0.5))
BARs_INTERACTIVE_One
Birth Areas are in Z-A order, we will have to use factor levels to reorder Birth Areas.
#N°3
BARs_INTERACTIVE_Two <- ggplotly(BARs) %>%
layout(title = list(text = "International MiGrant Stocks in World From 6 Areas (1990-2020)", x = 0.5))
BARs_INTERACTIVE_Two
#N°4
BARs_INTERACTIVE_THREE <- ggplot(IMS.FDATA) +
geom_bar_interactive(aes(x = Year, y = IMS_Total, fill = Birth,
tooltip = paste("Residence:", Residence, "<br>Birth:", Birth, "<br>Year:", Year,
"<br>International MiGrant Stock:",
paste0(format(IMS_Total, trim = TRUE, scientific = FALSE, big.mark = " ")))),
position = "stack", stat = "identity") +
labs(title = "International MiGrant Stocks in World from 6 Areas (1990-2020)",
y = "International MiGrant Stock - Both Sexes Combined") +
scale_fill_manual(values = CRs) +
scale_y_continuous(labels = scales::label_number(scale = 1e-6, suffix = "M")) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5), legend.position = "right")
girafe(ggobj = BARs_INTERACTIVE_THREE)
We will use plotly or ggplot2 combine with
ggiraph to make our STACKEd BARs and LINEs.
#LibrariesCOMPARISON
LibrariesCOMPARISON <- data.frame(
LIBRARY = c("GGPLOT2", "PLOTLY", "GGPLOTLY", "GGIRAPH"),
POSITIVE = c("EASY | Nice GRAPHICs",
"Interactive | DownLoad Button | Hover Text | Share X-axis",
"Interactive | DownLoad Button",
"Interactive | DownLoad Button | Hover Text | EASY"),
NEGATIVE = c("Static", "Labels Issues", "Labels Issues | Hover Text", "-"))
#Table
datatable(LibrariesCOMPARISON, options = list(scrollX = TRUE, autoWidth = TRUE),
style = "bootstrap", rownames = FALSE, caption = "GRAPHICs' Libraries COMPARISON ")
DATA MANIPULATIONs can take time. We need to do it to avoid load times on client/server side.
There are 3 Dev. Levels from OHRLLS in IMS DATA (see section 3):
We have found method. data from UNSD which lists countries as LDC| LLDC | SIDS. There are 3 variables (LDC | LLDC | SIDS), with a small cross (x) if an area is in one level (an area can be in 2 levels at once). We create a new variable with 6 modalities:
We have rows based on LDC | LLDC | SIDS in IMS DATA. We want to create new ones to retrieve data that follow our new variable (6 modalities):
We need to calculate new rows thanks to countries data. We have method. data from UNSD (2023) with LDC (46) | LLDC (32) | SIDS (53), and we know LDC (47) | LLDC (32) | SIDS (58) in method. from IMS DATA (2020):
Remarks:
#Load XLSX File
Methodo_from_ONU <- read_excel("../Données/REGIONs/ONU/methodo.xlsx") %>%
rename(Area = 9, RCode = 10, ISO2 = 11, ISO3 = 12, LDC = 13, LLDC = 14, SIDS = 15) %>% select(9:15)
#Channel Islands
Methodo_from_ONU <- Methodo_from_ONU %>% filter(!ISO3 %in% c("JEY", "GGY"))
Methodo_from_ONU <- rbind(
Methodo_from_ONU, data.frame(
Area = "Channel Islands", RCode = 830, ISO2 = NA, ISO3 = "CHA", LDC = NA, LLDC = NA, SIDS = NA))
#LDC | LLDC | SIDS => "x"
Methodo_from_ONU[which(Methodo_from_ONU$Area == "Vanuatu"), "LDC"] <- "x"
Areas_in_SIDS <- c(
"Martinique", "Turks and Caicos Islands", "Guadeloupe", "Cayman Islands", "Bermuda", "Bahrain")
Methodo_from_ONU[Methodo_from_ONU$Area %in% Areas_in_SIDS, "SIDS"] <- "x"
#New Variable - DevLevel (5)
Methodo_from_ONU <- Methodo_from_ONU %>%
mutate(DevLevel = case_when(
LDC == 'x' & LLDC == 'x' & SIDS == 'x' ~ 'LDC | LLDC | SIDS',
LDC == 'x' & LLDC == 'x' & is.na(SIDS) ~ 'LDC | LLDC',
LDC == 'x' & is.na(LLDC) & SIDS == 'x' ~ 'LDC | SIDS',
is.na(LDC) & LLDC == 'x' & SIDS == 'x' ~ 'LLDC | SIDS',
LDC == 'x' & is.na(LLDC) & is.na(SIDS) ~ 'LDC',
is.na(LDC) & LLDC == 'x' & is.na(SIDS) ~ 'LLDC',
is.na(LDC) & is.na(LLDC) & SIDS == 'x' ~ 'SIDS'))
##### Residence #####
#IMS.DATA WITH NEW ROWs (0)
IMS.DATA.NEWR <- data.frame(Residence = character(0), ResidenceCode = numeric(0),
Birth = character(0), BirthCode = numeric(0),
Year = character(0),
IMS_Total = numeric(0), IMS_Male = numeric(0), IMS_Female = numeric(0),
stringsAsFactors = FALSE)
#IMS DATA for SelectedBCode and LDC* or LLDC* or SIDS* | LDC/LLDC | LDC/SIDS Countries
DEVT <- function(DATA, LV, IMS, RESIDENCE, RCODE) {
Level <- DATA %>% filter(DevLevel == LV)
fDATA <- IMS %>% filter(ResidenceCode %in% c(Level$RCode), BirthCode == SelectedBCode)
if (nrow(fDATA) == 0) {
fDATA <- data.frame(Residence = RESIDENCE, ResidenceCode = RCODE,
Birth = IMS$Birth[ROW], BirthCode = SelectedBCode,
Year = c(1990, 1995, 2000, 2005, 2010, 2015, 2020),
IMS_Total = 0, IMS_Male = 0, IMS_Female = 0)
} else {
fDATA <- fDATA %>% group_by(Year) %>%
summarize(IMS_Total = sum(IMS_Total), IMS_Male = sum(IMS_Male), IMS_Female = sum(IMS_Female)) %>%
mutate(Residence = RESIDENCE, ResidenceCode = RCODE, Birth = IMS$Birth[ROW], BirthCode = SelectedBCode) %>%
select(5:8, 1:4)
}
return(fDATA)
}
#Loop which calculate new rows
# for (ROW in 1:nrow(IMS.DATA)) {
# #BirthCode done or !done
# SelectedBCode <- IMS.DATA$BirthCode[ROW]
# if (SelectedBCode %in% IMS.DATA.NEWR$BirthCode) {
# # print("Done!")
# } else if (SelectedBCode == 2003) {
# #PROGRESSION
# print(paste0("PROGRESSION: ", ROW, "/", nrow(IMS.DATA)))
#
# #Get IMS DATA for SelectedBCode
# IMS.FDATA <- IMS.DATA %>%
# filter(BirthCode == 2003, Year == c(1990, 1995, 2000, 2005, 2010, 2015, 2020)) %>%
# distinct(ResidenceCode, Year, .keep_all = TRUE)
# IMS.DATA.NEWR <- rbind(IMS.DATA.NEWR, IMS.FDATA)
# } else {
# #PROGRESSION
# print(paste0("PROGRESSION: ", ROW, "/", nrow(IMS.DATA)))
#
# #Get IMS DATA for SelectedBCode
# IMS.FDATA <- IMS.DATA %>%
# filter(BirthCode == SelectedBCode, Year == c(1990, 1995, 2000, 2005, 2010, 2015, 2020)) %>%
# distinct(ResidenceCode, Year, .keep_all = TRUE)
# IMS.DATA.NEWR <- rbind(IMS.DATA.NEWR, IMS.FDATA)
#
# #Get IMS DATA for SelectedBCode and LDC* Countries
# fDATA <- DEVT(DATA = Methodo_from_ONU, LV = "LDC", IMS = IMS.DATA,
# RESIDENCE = "Least Developed Countries* (LDC*)", RCODE = 942)
# IMS.DATA.NEWR <- rbind(IMS.DATA.NEWR, fDATA)
#
# #Get IMS DATA for SelectedBCode and LLDC* Countries
# fDATA <- DEVT(DATA = Methodo_from_ONU, LV = "LLDC", IMS = IMS.DATA,
# RESIDENCE = "Land-Locked Developing Countries* (LLDC*)", RCODE = 1638)
# IMS.DATA.NEWR <- rbind(IMS.DATA.NEWR, fDATA)
#
# #Get IMS DATA for SelectedBCode and SIDS* Countries
# fDATA <- DEVT(DATA = Methodo_from_ONU, LV = "SIDS", IMS = IMS.DATA,
# RESIDENCE = "Small Island Developing States* (SIDS*)", RCODE = 1639)
# IMS.DATA.NEWR <- rbind(IMS.DATA.NEWR, fDATA)
#
# #Get IMS DATA for SelectedBCode and LDC/LLDC Countries
# fDATA <- DEVT(DATA = Methodo_from_ONU, LV = "LDC | LLDC", IMS = IMS.DATA,
# RESIDENCE = "LDC | LLDC", RCODE = 1640)
# IMS.DATA.NEWR <- rbind(IMS.DATA.NEWR, fDATA)
#
# #Get IMS DATA for SelectedBCode and LDC/SIDS Countries
# fDATA <- DEVT(DATA = Methodo_from_ONU, LV = "LDC | SIDS", IMS = IMS.DATA,
# RESIDENCE = "LDC | SIDS", RCODE = 1641)
# IMS.DATA.NEWR <- rbind(IMS.DATA.NEWR, fDATA)
# }
# }
##### Birth #####
#IMS.DATA WITH NEW ROWs (0)
IMS.DATA.NEWRB <- data.frame(Residence = character(0), ResidenceCode = numeric(0),
Birth = character(0), BirthCode = numeric(0),
Year = character(0),
IMS_Total = numeric(0), IMS_Male = numeric(0), IMS_Female = numeric(0),
stringsAsFactors = FALSE)
#IMS DATA for SelectedRCode and LDC* or LLDC* or SIDS* | LDC/LLDC | LDC/SIDS Countries
DEVT <- function(DATA, LV, IMS, BIRTH, BCODE) {
Level <- DATA %>% filter(DevLevel == LV)
fDATA <- IMS %>% filter(ResidenceCode == SelectedRCode, BirthCode %in% c(Level$RCode))
if (nrow(fDATA) == 0) {
fDATA <- data.frame(Residence = IMS$Residence[ROW], ResidenceCode = SelectedRCode,
Birth = BIRTH, BirthCode = BCODE, Year = c(1990, 1995, 2000, 2005, 2010, 2015, 2020),
IMS_Total = 0, IMS_Male = 0, IMS_Female = 0)
} else {
fDATA <- fDATA %>% group_by(Year) %>%
summarize(IMS_Total = sum(IMS_Total), IMS_Male = sum(IMS_Male), IMS_Female = sum(IMS_Female)) %>%
mutate(Residence = IMS$Residence[ROW], ResidenceCode = SelectedRCode, Birth = BIRTH, BirthCode = BCODE) %>%
select(5:8, 1:4)
}
return(fDATA)
}
#Loop which calculate new rows
# for (ROW in 1:nrow(IMS.DATA.NEWR)) {
# #ResidenceCode done or !done
# SelectedRCode <- IMS.DATA.NEWR$ResidenceCode[ROW]
# if (SelectedRCode %in% IMS.DATA.NEWRB$ResidenceCode) {
# # print("Done!")
# } else {
# #PROGRESSION
# print(paste0("PROGRESSION: ", ROW, "/", nrow(IMS.DATA.NEWR)))
#
# #Get IMS DATA for SelectedRCode
# IMS.FDATA <- IMS.DATA.NEWR %>%
# filter(ResidenceCode == SelectedRCode, Year == c(1990, 1995, 2000, 2005, 2010, 2015, 2020)) %>%
# distinct(BirthCode, Year, .keep_all = TRUE)
# IMS.DATA.NEWRB <- rbind(IMS.DATA.NEWRB, IMS.FDATA)
#
# #Get IMS DATA for SelectedRCode and LDC* Countries
# fDATA <- DEVT(DATA = Methodo_from_ONU, LV = "LDC", IMS = IMS.DATA.NEWR,
# BIRTH = "Least Developed Countries* (LDC*)", BCODE = 942)
# IMS.DATA.NEWRB <- rbind(IMS.DATA.NEWRB, fDATA)
#
# #Get IMS DATA for SelectedRCode and LLDC* Countries
# fDATA <- DEVT(DATA = Methodo_from_ONU, LV = "LLDC", IMS = IMS.DATA.NEWR,
# BIRTH = "Land-Locked Developing Countries* (LLDC*)", BCODE = 1638)
# IMS.DATA.NEWRB <- rbind(IMS.DATA.NEWRB, fDATA)
#
# #Get IMS DATA for SelectedRCode and SIDS* Countries
# fDATA <- DEVT(DATA = Methodo_from_ONU, LV = "SIDS", IMS = IMS.DATA.NEWR,
# BIRTH = "Small Island Developing States* (SIDS*)", BCODE = 1639)
# IMS.DATA.NEWRB <- rbind(IMS.DATA.NEWRB, fDATA)
#
# #Get IMS DATA for SelectedRCode and LDC/LLDC Countries
# fDATA <- DEVT(DATA = Methodo_from_ONU, LV = "LDC | LLDC", IMS = IMS.DATA.NEWR,
# BIRTH = "LDC | LLDC", BCODE = 1640)
# IMS.DATA.NEWRB <- rbind(IMS.DATA.NEWRB, fDATA)
#
# #Get IMS DATA for SelectedRCode and LDC/SIDS Countries
# fDATA <- DEVT(DATA = Methodo_from_ONU, LV = "LDC | SIDS", IMS = IMS.DATA.NEWR,
# BIRTH = "LDC | SIDS", BCODE = 1641)
# IMS.DATA.NEWRB <- rbind(IMS.DATA.NEWRB, fDATA)
# }
# }
#Reorder Rows (Residence-Birth)
# URCode <- unique(IMS.DATA.NEWRB$ResidenceCode)
# ORCode <- c(URCode[1:14], 942, 1636, 1638, 1637, 1639, 1640, 1641, URCode[17:282])
# UBCode <- unique(IMS.DATA.NEWRB$BirthCode)
# RUBCode <- c(UBCode[1:14], 942, 1636, 1638, 1637, 1639, 1640, 1641, UBCode[17:282])
# CUBCode <- c(UBCode[1:14], 942, 1636, 1638, 1637, 1639, 1640, 1641, UBCode[17:282], 2003)
# IMS.DATA.NEWRB <- IMS.DATA.NEWRB %>% arrange(match(ResidenceCode, ORCode), match(BirthCode, RUBCode))